knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE, fig.align = 'center') knitr::opts_knit$set(root.dir = "/mnt/raid5/Personal/tangchao/project/Nanopore/BarcodeDecomplex")
library(data.table) library(ggplot2) library(parallel) library(caret) library(doParallel) library(pbapply) library(patchwork) library(multiROC) library(pROC) library(cowplot) library(dummies) library(yardstick)
load("./analysis/11.Classifiers/01.ModelingData/Barcode_4.RData") set.seed(123) TestReads <- TestReads[, .SD[sample(.N, 10000), ], Barcode] TestData <- TestData[TestReads$read, ]
true_label1 <- dummies::dummy(TestReads$Class, sep = " ") true_label1 <- data.frame(true_label1) colnames(true_label1) <- gsub("Class.", "", colnames(true_label1)) colnames(true_label1) <- gsub("RTA.", "RTA-", colnames(true_label1)) colnames(true_label1) <- paste0(colnames(true_label1), "_true")
RF_Fit1 <- readRDS("./analysis/12.AlgorithmComparison/Version1/01.Models/RF_B4.Rds") pred_RF0 <- predict(RF_Fit1, TestData) pred_RF1 <- predict(RF_Fit1, TestData, type = "prob") pred_RF0 <- data.table(pred = pred_RF0, Prob = apply(pred_RF1, 1, max)) pred_RF0$true <- TestData$Class colnames(pred_RF1) <- paste0(colnames(pred_RF1), "_pred_RF")
final_RF1 <- cbind(true_label1, pred_RF1) roc_RF1 <- multi_roc(final_RF1) pr_RF1 <- multi_pr(final_RF1) plot_roc_RF1 <- as.data.table(plot_roc_data(roc_RF1)) plot_pr_RF1 <- as.data.table(plot_pr_data(pr_RF1))
pred_RF2 <- cbind(obs = TestData$Class, predict(RF_Fit1, TestData, type = "prob")) pr_auc(pred_RF2, obs, `RTA-21`:`RTA-40`) average_precision(pred_RF2, obs, `RTA-21`:`RTA-40`)
AdaBoost_Fit1 <- readRDS("./analysis/12.AlgorithmComparison/Version1/01.Models/AdaBoost_B4.Rds") pred_AdaBoost0 <- predict(AdaBoost_Fit1, TestData) pred_AdaBoost1 <- predict(AdaBoost_Fit1, TestData, type = "prob") pred_AdaBoost0 <- data.table(pred = pred_AdaBoost0, Prob = apply(pred_AdaBoost1, 1, max)) pred_AdaBoost0$true <- TestData$Class colnames(pred_AdaBoost1) <- paste0(colnames(pred_AdaBoost1), "_pred_AdaBoost")
pred_AdaBoost2 <- cbind(obs = TestData$Class, predict(AdaBoost_Fit1, TestData, type = "prob")) pr_auc(pred_AdaBoost2, obs, `RTA-21`:`RTA-40`) average_precision(pred_AdaBoost2, obs, `RTA-21`:`RTA-40`)
final_AdaBoost1 <- cbind(true_label1, pred_AdaBoost1) roc_AdaBoost1 <- multi_roc(final_AdaBoost1) pr_AdaBoost1 <- multi_pr(final_AdaBoost1) plot_roc_AdaBoost1 <- as.data.table(plot_roc_data(roc_AdaBoost1)) plot_pr_AdaBoost1 <- as.data.table(plot_pr_data(pr_AdaBoost1))
CART_Fit1 <- readRDS("./analysis/12.AlgorithmComparison/Version1/01.Models/CART_B4.Rds") pred_CART0 <- predict(CART_Fit1, TestData) pred_CART1 <- predict(CART_Fit1, TestData, type = "prob") pred_CART0 <- data.table(pred = pred_CART0, Prob = apply(pred_CART1, 1, max)) pred_CART0$true <- TestData$Class colnames(pred_CART1) <- paste0(colnames(pred_CART1), "_pred_CART")
pred_CART2 <- cbind(obs = TestData$Class, predict(CART_Fit1, TestData, type = "prob")) pr_auc(pred_CART2, obs, `RTA-21`:`RTA-40`) average_precision(pred_CART2, obs, `RTA-21`:`RTA-40`)
final_CART1 <- cbind(true_label1, pred_CART1) roc_CART1 <- multi_roc(final_CART1) pr_CART1 <- multi_pr(final_CART1) plot_roc_CART1 <- as.data.table(plot_roc_data(roc_CART1)) plot_pr_CART1 <- as.data.table(plot_pr_data(pr_CART1))
KNN_Fit1 <- readRDS("./analysis/12.AlgorithmComparison/Version1/01.Models/KNN_B4.Rds") pred_KNN0 <- predict(KNN_Fit1, TestData) pred_KNN1 <- predict(KNN_Fit1, TestData, type = "prob") pred_KNN0 <- data.table(pred = pred_KNN0, Prob = apply(pred_KNN1, 1, max)) pred_KNN0$true <- TestData$Class colnames(pred_KNN1) <- paste0(colnames(pred_KNN1), "_pred_KNN")
pred_KNN2 <- cbind(obs = TestData$Class, predict(KNN_Fit1, TestData, type = "prob")) pr_auc(pred_KNN2, obs, `RTA-21`:`RTA-40`) average_precision(pred_KNN2, obs, `RTA-21`:`RTA-40`)
final_KNN1 <- cbind(true_label1, pred_KNN1) roc_KNN1 <- multi_roc(final_KNN1) pr_KNN1 <- multi_pr(final_KNN1) plot_roc_KNN1 <- as.data.table(plot_roc_data(roc_KNN1)) plot_pr_KNN1 <- as.data.table(plot_pr_data(pr_KNN1))
NB_Fit1 <- readRDS("./analysis/12.AlgorithmComparison/Version1/01.Models/NB_B4.Rds") pred_NB0 <- predict(NB_Fit1, TestData) pred_NB1 <- predict(NB_Fit1, TestData, type = "prob") pred_NB0 <- data.table(pred = pred_NB0, Prob = apply(pred_NB1, 1, max)) pred_NB0$true <- TestData$Class colnames(pred_NB1) <- paste0(colnames(pred_NB1), "_pred_NB")
pred_NB2 <- cbind(obs = TestData$Class, predict(NB_Fit1, TestData, type = "prob")) pr_auc(pred_NB2, obs, `RTA-21`:`RTA-40`) average_precision(pred_NB2, obs, `RTA-21`:`RTA-40`)
final_NB1 <- cbind(true_label1, pred_NB1) roc_NB1 <- multi_roc(final_NB1) pr_NB1 <- multi_pr(final_NB1) plot_roc_NB1 <- as.data.table(plot_roc_data(roc_NB1)) plot_pr_NB1 <- as.data.table(plot_pr_data(pr_NB1))
NNet_Fit1 <- readRDS("./analysis/12.AlgorithmComparison/Version1/01.Models/NNet_B4.Rds") pred_NNet0 <- predict(NNet_Fit1, TestData) pred_NNet1 <- predict(NNet_Fit1, TestData, type = "prob") pred_NNet0 <- data.table(pred = pred_NNet0, Prob = apply(pred_NNet1, 1, max)) pred_NNet0$true <- TestData$Class colnames(pred_NNet1) <- paste0(colnames(pred_NNet1), "_pred_NNet")
pred_NNet2 <- cbind(obs = TestData$Class, predict(NNet_Fit1, TestData, type = "prob")) pr_auc(pred_NNet2, obs, `RTA-21`:`RTA-40`) average_precision(pred_NNet2, obs, `RTA-21`:`RTA-40`)
final_NNet1 <- cbind(true_label1, pred_NNet1) roc_NNet1 <- multi_roc(final_NNet1) pr_NNet1 <- multi_pr(final_NNet1) plot_roc_NNet1 <- as.data.table(plot_roc_data(roc_NNet1)) plot_pr_NNet1 <- as.data.table(plot_pr_data(pr_NNet1))
plot_roc_List <- list(RF = plot_roc_RF1, NB = plot_roc_NB1, NNet = plot_roc_NNet1, KNN = plot_roc_KNN1, CART = plot_roc_CART1, AdaBoost = plot_roc_AdaBoost1) plot_pr_List <- list(RF = plot_pr_RF1, NB = plot_pr_NB1, NNet = plot_pr_NNet1, KNN = plot_pr_KNN1, CART = plot_pr_CART1, AdaBoost = plot_pr_AdaBoost1)
saveRDS(plot_roc_List, "./analysis/12.AlgorithmComparison/Version1/02.Compare/Testset_plot_roc_List.Rds") saveRDS(plot_pr_List, "./analysis/12.AlgorithmComparison/Version1/02.Compare/Testset_plot_pr_List.Rds")
pred0_List <- list(RF = pred_RF0, NB = pred_NB0, NNet = pred_NNet0, KNN = pred_KNN0, CART = pred_CART0, AdaBoost = pred_AdaBoost0) pred1_List <- list(RF = pred_RF1, NB = pred_NB1, NNet = pred_NNet1, KNN = pred_KNN1, CART = pred_CART1, AdaBoost = pred_AdaBoost1) saveRDS(pred0_List, "./analysis/12.AlgorithmComparison/Version1/02.Compare/Testset_pred0_List.Rds") saveRDS(pred1_List, "./analysis/12.AlgorithmComparison/Version1/02.Compare/Testset_pred1_List.Rds")
confMat <- lapply(pred0_List, function(x) { cM <- x[, confusionMatrix(pred, true)] data.table(Accuracy = cM$overall[1], Sensitivity = apply(cM$byClass, 2, mean)[1], Specificity = apply(cM$byClass, 2, mean)[2], Precision = apply(cM$byClass, 2, mean)[5], Recall = apply(cM$byClass, 2, mean)[6], F1 = apply(cM$byClass, 2, mean, na.rm = T)[7]) }) confMat <- data.table(Model = names(pred0_List), do.call(rbind, confMat)) write.csv(confMat, "./analysis/12.AlgorithmComparison/Version1/02.Compare/Testset_confusionMatrix.csv")
MoldeCols <- ggsci::pal_igv()(6) names(MoldeCols) <- c("RF", "NB", "NNet", "KNN", "CART", "AdaBoost")
ROC_AUCs <- lapply(plot_roc_List, function(x) { data.table(Macro = x[Group == "Macro", unique(AUC)], Micro = x[Group == "Micro", unique(AUC)]) }) ROC_AUCs <- data.table(Model = names(plot_roc_List), Curve = "ROC", do.call(rbind, ROC_AUCs)) PR_AUCs <- lapply(plot_pr_List, function(x) { data.table(Macro = x[Group == "Macro", unique(AUC)], Micro = x[Group == "Micro", unique(AUC)]) }) PR_AUCs <- data.table(Model = names(plot_roc_List), Curve = "PR", do.call(rbind, PR_AUCs)) AUCs <- rbind(ROC_AUCs, PR_AUCs)
roc_mat <- lapply(plot_roc_List, function(x) { set.seed(123) x[Group == "Macro"][sort(sample(.N, 1000))] }) roc_mat <- data.table(Model = rep(names(plot_roc_List), each = 1000), do.call(rbind, roc_mat))
od <- AUCs[Curve == "ROC"][order(Macro, decreasing = T), as.character(Model)] roc_mat[, Model := factor(Model, levels = od)] AUCs[, Model := factor(Model, levels = od)] setkey(AUCs, Curve, Model)
ggplot(roc_mat, aes(x = 1 - Specificity, y = Sensitivity, colour = Model)) + geom_path(size = 1) + geom_text(data = AUCs[Curve == "ROC", ], mapping = aes(x = 0.75, y = 6:1/10, label = paste0("AUC = ", round(Macro, 4)))) + theme_bw(base_size = 15) + scale_color_manual(values = MoldeCols[od]) + lims(x = c(0, 1), y = c(0, 1)) ggsave("./analysis/12.AlgorithmComparison/Version1/02.Compare/Test_ROCs.pdf", width = 5.5, height = 4)
pr_mat <- lapply(plot_pr_List, function(x) { set.seed(123) x[Group == "Macro"][sort(sample(.N, 1000))] }) pr_mat <- data.table(Model = rep(names(plot_pr_List), each = 1000), do.call(rbind, pr_mat))
od <- AUCs[Curve == "PR"][order(Macro, decreasing = T), as.character(Model)] pr_mat[, Model := factor(Model, levels = od)] AUCs[, Model := factor(Model, levels = od)] setkey(AUCs, Curve, Model)
ggplot(pr_mat, aes(x = Recall, y = Precision, colour = Model)) + geom_path(size = 1) + geom_text(data = AUCs[Curve == "PR", ], mapping = aes(x = 0.25, y = 6:1/10, label = paste0("AUC = ", round(Macro, 4)))) + theme_bw(base_size = 15) + scale_color_manual(values = MoldeCols[od]) + lims(x = c(0, 1), y = c(0, 1)) ggsave("./analysis/12.AlgorithmComparison/Version1/02.Compare/Test_PRCs.pdf", width = 5.5, height = 4)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.